{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T11:58:08.091300Z",
     "start_time": "2020-12-14T11:58:06.426954Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T11:58:51.734328Z",
     "start_time": "2020-12-14T11:58:51.636539Z"
    }
   },
   "outputs": [],
   "source": [
    "data = pd.read_csv('tmdb_5000_movies.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T12:00:27.891095Z",
     "start_time": "2020-12-14T12:00:27.871172Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 4803 entries, 0 to 4802\n",
      "Data columns (total 20 columns):\n",
      " #   Column                Non-Null Count  Dtype  \n",
      "---  ------                --------------  -----  \n",
      " 0   budget                4803 non-null   int64  \n",
      " 1   genres                4803 non-null   object \n",
      " 2   homepage              1712 non-null   object \n",
      " 3   id                    4803 non-null   int64  \n",
      " 4   keywords              4803 non-null   object \n",
      " 5   original_language     4803 non-null   object \n",
      " 6   original_title        4803 non-null   object \n",
      " 7   overview              4800 non-null   object \n",
      " 8   popularity            4803 non-null   float64\n",
      " 9   production_companies  4803 non-null   object \n",
      " 10  production_countries  4803 non-null   object \n",
      " 11  release_date          4802 non-null   object \n",
      " 12  revenue               4803 non-null   int64  \n",
      " 13  runtime               4801 non-null   float64\n",
      " 14  spoken_languages      4803 non-null   object \n",
      " 15  status                4803 non-null   object \n",
      " 16  tagline               3959 non-null   object \n",
      " 17  title                 4803 non-null   object \n",
      " 18  vote_average          4803 non-null   float64\n",
      " 19  vote_count            4803 non-null   int64  \n",
      "dtypes: float64(3), int64(4), object(13)\n",
      "memory usage: 750.6+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T12:03:38.496022Z",
     "start_time": "2020-12-14T12:03:38.467355Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>id</th>\n",
       "      <th>popularity</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>4.803000e+03</td>\n",
       "      <td>4803.000000</td>\n",
       "      <td>4803.000000</td>\n",
       "      <td>4.803000e+03</td>\n",
       "      <td>4801.000000</td>\n",
       "      <td>4803.000000</td>\n",
       "      <td>4803.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.904504e+07</td>\n",
       "      <td>57165.484281</td>\n",
       "      <td>21.492301</td>\n",
       "      <td>8.226064e+07</td>\n",
       "      <td>106.875859</td>\n",
       "      <td>6.092172</td>\n",
       "      <td>690.217989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>4.072239e+07</td>\n",
       "      <td>88694.614033</td>\n",
       "      <td>31.816650</td>\n",
       "      <td>1.628571e+08</td>\n",
       "      <td>22.611935</td>\n",
       "      <td>1.194612</td>\n",
       "      <td>1234.585891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>7.900000e+05</td>\n",
       "      <td>9014.500000</td>\n",
       "      <td>4.668070</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>94.000000</td>\n",
       "      <td>5.600000</td>\n",
       "      <td>54.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.500000e+07</td>\n",
       "      <td>14629.000000</td>\n",
       "      <td>12.921594</td>\n",
       "      <td>1.917000e+07</td>\n",
       "      <td>103.000000</td>\n",
       "      <td>6.200000</td>\n",
       "      <td>235.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4.000000e+07</td>\n",
       "      <td>58610.500000</td>\n",
       "      <td>28.313505</td>\n",
       "      <td>9.291719e+07</td>\n",
       "      <td>118.000000</td>\n",
       "      <td>6.800000</td>\n",
       "      <td>737.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3.800000e+08</td>\n",
       "      <td>459488.000000</td>\n",
       "      <td>875.581305</td>\n",
       "      <td>2.787965e+09</td>\n",
       "      <td>338.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>13752.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             budget             id   popularity       revenue      runtime  \\\n",
       "count  4.803000e+03    4803.000000  4803.000000  4.803000e+03  4801.000000   \n",
       "mean   2.904504e+07   57165.484281    21.492301  8.226064e+07   106.875859   \n",
       "std    4.072239e+07   88694.614033    31.816650  1.628571e+08    22.611935   \n",
       "min    0.000000e+00       5.000000     0.000000  0.000000e+00     0.000000   \n",
       "25%    7.900000e+05    9014.500000     4.668070  0.000000e+00    94.000000   \n",
       "50%    1.500000e+07   14629.000000    12.921594  1.917000e+07   103.000000   \n",
       "75%    4.000000e+07   58610.500000    28.313505  9.291719e+07   118.000000   \n",
       "max    3.800000e+08  459488.000000   875.581305  2.787965e+09   338.000000   \n",
       "\n",
       "       vote_average    vote_count  \n",
       "count   4803.000000   4803.000000  \n",
       "mean       6.092172    690.217989  \n",
       "std        1.194612   1234.585891  \n",
       "min        0.000000      0.000000  \n",
       "25%        5.600000     54.000000  \n",
       "50%        6.200000    235.000000  \n",
       "75%        6.800000    737.000000  \n",
       "max       10.000000  13752.000000  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T12:54:05.294291Z",
     "start_time": "2020-12-14T12:54:05.290781Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4803, 20)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T12:54:12.929043Z",
     "start_time": "2020-12-14T12:54:12.923666Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "budget                    int64\n",
       "genres                   object\n",
       "homepage                 object\n",
       "id                        int64\n",
       "keywords                 object\n",
       "original_language        object\n",
       "original_title           object\n",
       "overview                 object\n",
       "popularity              float64\n",
       "production_companies     object\n",
       "production_countries     object\n",
       "release_date             object\n",
       "revenue                   int64\n",
       "runtime                 float64\n",
       "spoken_languages         object\n",
       "status                   object\n",
       "tagline                  object\n",
       "title                    object\n",
       "vote_average            float64\n",
       "vote_count                int64\n",
       "dtype: object"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T12:54:30.873003Z",
     "start_time": "2020-12-14T12:54:30.857790Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>237000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.avatarmovie.com/</td>\n",
       "      <td>19995</td>\n",
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>150.437577</td>\n",
       "      <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2009-12-10</td>\n",
       "      <td>2787965087</td>\n",
       "      <td>162.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
       "      <td>Released</td>\n",
       "      <td>Enter the World of Pandora.</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>7.2</td>\n",
       "      <td>11800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>300000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
       "      <td>http://disney.go.com/disneypictures/pirates/</td>\n",
       "      <td>285</td>\n",
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
       "      <td>en</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>139.082615</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2007-05-19</td>\n",
       "      <td>961000000</td>\n",
       "      <td>169.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>At the end of the world, the adventure begins.</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>6.9</td>\n",
       "      <td>4500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>245000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.sonypictures.com/movies/spectre/</td>\n",
       "      <td>206647</td>\n",
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
       "      <td>en</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>107.376788</td>\n",
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...</td>\n",
       "      <td>[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...</td>\n",
       "      <td>2015-10-26</td>\n",
       "      <td>880674609</td>\n",
       "      <td>148.0</td>\n",
       "      <td>[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...</td>\n",
       "      <td>Released</td>\n",
       "      <td>A Plan No One Escapes</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>6.3</td>\n",
       "      <td>4466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>250000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
       "      <td>http://www.thedarkknightrises.com/</td>\n",
       "      <td>49026</td>\n",
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
       "      <td>en</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>112.312950</td>\n",
       "      <td>[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-07-16</td>\n",
       "      <td>1084939099</td>\n",
       "      <td>165.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>The Legend Ends</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>7.6</td>\n",
       "      <td>9106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>260000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://movies.disney.com/john-carter</td>\n",
       "      <td>49529</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>43.926995</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-03-07</td>\n",
       "      <td>284139100</td>\n",
       "      <td>132.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>Lost in our world, found in another.</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>6.1</td>\n",
       "      <td>2124</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      budget                                             genres  \\\n",
       "0  237000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "1  300000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
       "2  245000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "3  250000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
       "4  260000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "\n",
       "                                       homepage      id  \\\n",
       "0                   http://www.avatarmovie.com/   19995   \n",
       "1  http://disney.go.com/disneypictures/pirates/     285   \n",
       "2   http://www.sonypictures.com/movies/spectre/  206647   \n",
       "3            http://www.thedarkknightrises.com/   49026   \n",
       "4          http://movies.disney.com/john-carter   49529   \n",
       "\n",
       "                                            keywords original_language  \\\n",
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...                en   \n",
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...                en   \n",
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...                en   \n",
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...                en   \n",
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...                en   \n",
       "\n",
       "                             original_title  \\\n",
       "0                                    Avatar   \n",
       "1  Pirates of the Caribbean: At World's End   \n",
       "2                                   Spectre   \n",
       "3                     The Dark Knight Rises   \n",
       "4                               John Carter   \n",
       "\n",
       "                                            overview  popularity  \\\n",
       "0  In the 22nd century, a paraplegic Marine is di...  150.437577   \n",
       "1  Captain Barbossa, long believed to be dead, ha...  139.082615   \n",
       "2  A cryptic message from Bond’s past sends him o...  107.376788   \n",
       "3  Following the death of District Attorney Harve...  112.312950   \n",
       "4  John Carter is a war-weary, former military ca...   43.926995   \n",
       "\n",
       "                                production_companies  \\\n",
       "0  [{\"name\": \"Ingenious Film Partners\", \"id\": 289...   \n",
       "1  [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...   \n",
       "2  [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...   \n",
       "3  [{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...   \n",
       "4        [{\"name\": \"Walt Disney Pictures\", \"id\": 2}]   \n",
       "\n",
       "                                production_countries release_date     revenue  \\\n",
       "0  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2009-12-10  2787965087   \n",
       "1  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2007-05-19   961000000   \n",
       "2  [{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...   2015-10-26   880674609   \n",
       "3  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-07-16  1084939099   \n",
       "4  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-03-07   284139100   \n",
       "\n",
       "   runtime                                   spoken_languages    status  \\\n",
       "0    162.0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...  Released   \n",
       "1    169.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "2    148.0  [{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...  Released   \n",
       "3    165.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "4    132.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
       "\n",
       "                                          tagline  \\\n",
       "0                     Enter the World of Pandora.   \n",
       "1  At the end of the world, the adventure begins.   \n",
       "2                           A Plan No One Escapes   \n",
       "3                                 The Legend Ends   \n",
       "4            Lost in our world, found in another.   \n",
       "\n",
       "                                      title  vote_average  vote_count  \n",
       "0                                    Avatar           7.2       11800  \n",
       "1  Pirates of the Caribbean: At World's End           6.9        4500  \n",
       "2                                   Spectre           6.3        4466  \n",
       "3                     The Dark Knight Rises           7.6        9106  \n",
       "4                               John Carter           6.1        2124  "
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据缺失\n",
    "- homepage              1712 non-null   object \n",
    "- release_date          4802 non-null   object \n",
    "- runtime               4801 non-null   float64\n",
    "- tagline               3959 non-null   object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T13:52:32.388551Z",
     "start_time": "2020-12-14T13:52:32.360919Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>225000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 10751...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2454</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>The Chronicles of Narnia: Prince Caspian</td>\n",
       "      <td>One year after their incredible adventures in ...</td>\n",
       "      <td>53.978602</td>\n",
       "      <td>[{\"name\": \"Walt Disney\", \"id\": 5888}, {\"name\":...</td>\n",
       "      <td>[{\"iso_3166_1\": \"CZ\", \"name\": \"Czech Republic\"...</td>\n",
       "      <td>2008-05-15</td>\n",
       "      <td>419651413</td>\n",
       "      <td>150.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>Hope has a new face.</td>\n",
       "      <td>The Chronicles of Narnia: Prince Caspian</td>\n",
       "      <td>6.3</td>\n",
       "      <td>1630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>207000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 18, \"...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>254</td>\n",
       "      <td>[{\"id\": 774, \"name\": \"film business\"}, {\"id\": ...</td>\n",
       "      <td>en</td>\n",
       "      <td>King Kong</td>\n",
       "      <td>In 1933 New York, an overly ambitious movie pr...</td>\n",
       "      <td>61.226010</td>\n",
       "      <td>[{\"name\": \"WingNut Films\", \"id\": 11}, {\"name\":...</td>\n",
       "      <td>[{\"iso_3166_1\": \"NZ\", \"name\": \"New Zealand\"}, ...</td>\n",
       "      <td>2005-12-14</td>\n",
       "      <td>550000000</td>\n",
       "      <td>187.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>The eighth wonder of the world.</td>\n",
       "      <td>King Kong</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>209000000</td>\n",
       "      <td>[{\"id\": 53, \"name\": \"Thriller\"}, {\"id\": 28, \"n...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>44833</td>\n",
       "      <td>[{\"id\": 1721, \"name\": \"fight\"}, {\"id\": 4410, \"...</td>\n",
       "      <td>en</td>\n",
       "      <td>Battleship</td>\n",
       "      <td>When mankind beams a radio signal into space, ...</td>\n",
       "      <td>64.928382</td>\n",
       "      <td>[{\"name\": \"Universal Pictures\", \"id\": 33}, {\"n...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-04-11</td>\n",
       "      <td>303025485</td>\n",
       "      <td>131.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
       "      <td>Released</td>\n",
       "      <td>The Battle for Earth Begins at Sea</td>\n",
       "      <td>Battleship</td>\n",
       "      <td>5.5</td>\n",
       "      <td>2114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>210000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 28, \"...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36668</td>\n",
       "      <td>[{\"id\": 1852, \"name\": \"mutant\"}, {\"id\": 8828, ...</td>\n",
       "      <td>en</td>\n",
       "      <td>X-Men: The Last Stand</td>\n",
       "      <td>When a cure is found to treat mutations, lines...</td>\n",
       "      <td>3.857526</td>\n",
       "      <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
       "      <td>[{\"iso_3166_1\": \"CA\", \"name\": \"Canada\"}, {\"iso...</td>\n",
       "      <td>2006-05-24</td>\n",
       "      <td>459359555</td>\n",
       "      <td>104.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>Take a Stand</td>\n",
       "      <td>X-Men: The Last Stand</td>\n",
       "      <td>6.3</td>\n",
       "      <td>3525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>200000000</td>\n",
       "      <td>[{\"id\": 16, \"name\": \"Animation\"}, {\"id\": 10751...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>62211</td>\n",
       "      <td>[{\"id\": 1299, \"name\": \"monster\"}, {\"id\": 5984,...</td>\n",
       "      <td>en</td>\n",
       "      <td>Monsters University</td>\n",
       "      <td>A look at the relationship between Mike and Su...</td>\n",
       "      <td>89.186492</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2013-06-20</td>\n",
       "      <td>743559607</td>\n",
       "      <td>104.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>School never looked this scary.</td>\n",
       "      <td>Monsters University</td>\n",
       "      <td>7.0</td>\n",
       "      <td>3528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4795</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 18, \"name\": \"Drama\"}]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>124606</td>\n",
       "      <td>[{\"id\": 10726, \"name\": \"gang\"}, {\"id\": 33928, ...</td>\n",
       "      <td>en</td>\n",
       "      <td>Bang</td>\n",
       "      <td>A young woman in L.A. is having a bad day: she...</td>\n",
       "      <td>0.918116</td>\n",
       "      <td>[{\"name\": \"Asylum Films\", \"id\": 10571}, {\"name...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>1995-09-09</td>\n",
       "      <td>0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>Sometimes you've got to break the rules</td>\n",
       "      <td>Bang</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4797</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 10769, \"name\": \"Foreign\"}, {\"id\": 53, ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>67238</td>\n",
       "      <td>[]</td>\n",
       "      <td>en</td>\n",
       "      <td>Cavite</td>\n",
       "      <td>Adam, a security guard, travels from Californi...</td>\n",
       "      <td>0.022173</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>2005-03-12</td>\n",
       "      <td>0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>[]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cavite</td>\n",
       "      <td>7.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4798</th>\n",
       "      <td>220000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9367</td>\n",
       "      <td>[{\"id\": 5616, \"name\": \"united states\\u2013mexi...</td>\n",
       "      <td>es</td>\n",
       "      <td>El Mariachi</td>\n",
       "      <td>El Mariachi just wants to play his guitar and ...</td>\n",
       "      <td>14.269792</td>\n",
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"MX\", \"name\": \"Mexico\"}, {\"iso...</td>\n",
       "      <td>1992-09-04</td>\n",
       "      <td>2040920</td>\n",
       "      <td>81.0</td>\n",
       "      <td>[{\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>He didn't come looking for trouble, but troubl...</td>\n",
       "      <td>El Mariachi</td>\n",
       "      <td>6.6</td>\n",
       "      <td>238</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4799</th>\n",
       "      <td>9000</td>\n",
       "      <td>[{\"id\": 35, \"name\": \"Comedy\"}, {\"id\": 10749, \"...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>72766</td>\n",
       "      <td>[]</td>\n",
       "      <td>en</td>\n",
       "      <td>Newlyweds</td>\n",
       "      <td>A newlywed couple's honeymoon is upended by th...</td>\n",
       "      <td>0.642552</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>2011-12-26</td>\n",
       "      <td>0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>[]</td>\n",
       "      <td>Released</td>\n",
       "      <td>A newlywed couple's honeymoon is upended by th...</td>\n",
       "      <td>Newlyweds</td>\n",
       "      <td>5.9</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4802</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 99, \"name\": \"Documentary\"}]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25975</td>\n",
       "      <td>[{\"id\": 1523, \"name\": \"obsession\"}, {\"id\": 224...</td>\n",
       "      <td>en</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>Ever since the second grade when he first saw ...</td>\n",
       "      <td>1.929883</td>\n",
       "      <td>[{\"name\": \"rusty bear entertainment\", \"id\": 87...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2005-08-05</td>\n",
       "      <td>0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>6.3</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3091 rows × 20 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         budget                                             genres homepage  \\\n",
       "15    225000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 10751...      NaN   \n",
       "24    207000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 18, \"...      NaN   \n",
       "27    209000000  [{\"id\": 53, \"name\": \"Thriller\"}, {\"id\": 28, \"n...      NaN   \n",
       "33    210000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 28, \"...      NaN   \n",
       "34    200000000  [{\"id\": 16, \"name\": \"Animation\"}, {\"id\": 10751...      NaN   \n",
       "...         ...                                                ...      ...   \n",
       "4795          0                      [{\"id\": 18, \"name\": \"Drama\"}]      NaN   \n",
       "4797          0  [{\"id\": 10769, \"name\": \"Foreign\"}, {\"id\": 53, ...      NaN   \n",
       "4798     220000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...      NaN   \n",
       "4799       9000  [{\"id\": 35, \"name\": \"Comedy\"}, {\"id\": 10749, \"...      NaN   \n",
       "4802          0                [{\"id\": 99, \"name\": \"Documentary\"}]      NaN   \n",
       "\n",
       "          id                                           keywords  \\\n",
       "15      2454  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...   \n",
       "24       254  [{\"id\": 774, \"name\": \"film business\"}, {\"id\": ...   \n",
       "27     44833  [{\"id\": 1721, \"name\": \"fight\"}, {\"id\": 4410, \"...   \n",
       "33     36668  [{\"id\": 1852, \"name\": \"mutant\"}, {\"id\": 8828, ...   \n",
       "34     62211  [{\"id\": 1299, \"name\": \"monster\"}, {\"id\": 5984,...   \n",
       "...      ...                                                ...   \n",
       "4795  124606  [{\"id\": 10726, \"name\": \"gang\"}, {\"id\": 33928, ...   \n",
       "4797   67238                                                 []   \n",
       "4798    9367  [{\"id\": 5616, \"name\": \"united states\\u2013mexi...   \n",
       "4799   72766                                                 []   \n",
       "4802   25975  [{\"id\": 1523, \"name\": \"obsession\"}, {\"id\": 224...   \n",
       "\n",
       "     original_language                            original_title  \\\n",
       "15                  en  The Chronicles of Narnia: Prince Caspian   \n",
       "24                  en                                 King Kong   \n",
       "27                  en                                Battleship   \n",
       "33                  en                     X-Men: The Last Stand   \n",
       "34                  en                       Monsters University   \n",
       "...                ...                                       ...   \n",
       "4795                en                                      Bang   \n",
       "4797                en                                    Cavite   \n",
       "4798                es                               El Mariachi   \n",
       "4799                en                                 Newlyweds   \n",
       "4802                en                         My Date with Drew   \n",
       "\n",
       "                                               overview  popularity  \\\n",
       "15    One year after their incredible adventures in ...   53.978602   \n",
       "24    In 1933 New York, an overly ambitious movie pr...   61.226010   \n",
       "27    When mankind beams a radio signal into space, ...   64.928382   \n",
       "33    When a cure is found to treat mutations, lines...    3.857526   \n",
       "34    A look at the relationship between Mike and Su...   89.186492   \n",
       "...                                                 ...         ...   \n",
       "4795  A young woman in L.A. is having a bad day: she...    0.918116   \n",
       "4797  Adam, a security guard, travels from Californi...    0.022173   \n",
       "4798  El Mariachi just wants to play his guitar and ...   14.269792   \n",
       "4799  A newlywed couple's honeymoon is upended by th...    0.642552   \n",
       "4802  Ever since the second grade when he first saw ...    1.929883   \n",
       "\n",
       "                                   production_companies  \\\n",
       "15    [{\"name\": \"Walt Disney\", \"id\": 5888}, {\"name\":...   \n",
       "24    [{\"name\": \"WingNut Films\", \"id\": 11}, {\"name\":...   \n",
       "27    [{\"name\": \"Universal Pictures\", \"id\": 33}, {\"n...   \n",
       "33    [{\"name\": \"Ingenious Film Partners\", \"id\": 289...   \n",
       "34    [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...   \n",
       "...                                                 ...   \n",
       "4795  [{\"name\": \"Asylum Films\", \"id\": 10571}, {\"name...   \n",
       "4797                                                 []   \n",
       "4798           [{\"name\": \"Columbia Pictures\", \"id\": 5}]   \n",
       "4799                                                 []   \n",
       "4802  [{\"name\": \"rusty bear entertainment\", \"id\": 87...   \n",
       "\n",
       "                                   production_countries release_date  \\\n",
       "15    [{\"iso_3166_1\": \"CZ\", \"name\": \"Czech Republic\"...   2008-05-15   \n",
       "24    [{\"iso_3166_1\": \"NZ\", \"name\": \"New Zealand\"}, ...   2005-12-14   \n",
       "27    [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-04-11   \n",
       "33    [{\"iso_3166_1\": \"CA\", \"name\": \"Canada\"}, {\"iso...   2006-05-24   \n",
       "34    [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2013-06-20   \n",
       "...                                                 ...          ...   \n",
       "4795  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   1995-09-09   \n",
       "4797                                                 []   2005-03-12   \n",
       "4798  [{\"iso_3166_1\": \"MX\", \"name\": \"Mexico\"}, {\"iso...   1992-09-04   \n",
       "4799                                                 []   2011-12-26   \n",
       "4802  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2005-08-05   \n",
       "\n",
       "        revenue  runtime                                   spoken_languages  \\\n",
       "15    419651413    150.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "24    550000000    187.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "27    303025485    131.0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...   \n",
       "33    459359555    104.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "34    743559607    104.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "...         ...      ...                                                ...   \n",
       "4795          0     98.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "4797          0     80.0                                                 []   \n",
       "4798    2040920     81.0      [{\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]   \n",
       "4799          0     85.0                                                 []   \n",
       "4802          0     90.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "\n",
       "        status                                            tagline  \\\n",
       "15    Released                               Hope has a new face.   \n",
       "24    Released                    The eighth wonder of the world.   \n",
       "27    Released                 The Battle for Earth Begins at Sea   \n",
       "33    Released                                       Take a Stand   \n",
       "34    Released                    School never looked this scary.   \n",
       "...        ...                                                ...   \n",
       "4795  Released            Sometimes you've got to break the rules   \n",
       "4797  Released                                                NaN   \n",
       "4798  Released  He didn't come looking for trouble, but troubl...   \n",
       "4799  Released  A newlywed couple's honeymoon is upended by th...   \n",
       "4802  Released                                                NaN   \n",
       "\n",
       "                                         title  vote_average  vote_count  \n",
       "15    The Chronicles of Narnia: Prince Caspian           6.3        1630  \n",
       "24                                   King Kong           6.6        2337  \n",
       "27                                  Battleship           5.5        2114  \n",
       "33                       X-Men: The Last Stand           6.3        3525  \n",
       "34                         Monsters University           7.0        3528  \n",
       "...                                        ...           ...         ...  \n",
       "4795                                      Bang           6.0           1  \n",
       "4797                                    Cavite           7.5           2  \n",
       "4798                               El Mariachi           6.6         238  \n",
       "4799                                 Newlyweds           5.9           5  \n",
       "4802                         My Date with Drew           6.3          16  \n",
       "\n",
       "[3091 rows x 20 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['homepage'].isna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T13:52:30.167907Z",
     "start_time": "2020-12-14T13:52:30.153963Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4553</th>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>380097</td>\n",
       "      <td>[]</td>\n",
       "      <td>en</td>\n",
       "      <td>America Is Still the Place</td>\n",
       "      <td>1971 post civil rights San Francisco seemed li...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>[]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>America Is Still the Place</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      budget genres homepage      id keywords original_language  \\\n",
       "4553       0     []      NaN  380097       []                en   \n",
       "\n",
       "                  original_title  \\\n",
       "4553  America Is Still the Place   \n",
       "\n",
       "                                               overview  popularity  \\\n",
       "4553  1971 post civil rights San Francisco seemed li...         0.0   \n",
       "\n",
       "     production_companies production_countries release_date  revenue  runtime  \\\n",
       "4553                   []                   []          NaN        0      0.0   \n",
       "\n",
       "     spoken_languages    status tagline                       title  \\\n",
       "4553               []  Released     NaN  America Is Still the Place   \n",
       "\n",
       "      vote_average  vote_count  \n",
       "4553           0.0           0  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['release_date'].isna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T13:52:46.896774Z",
     "start_time": "2020-12-14T13:52:46.884059Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2656</th>\n",
       "      <td>15000000</td>\n",
       "      <td>[{\"id\": 18, \"name\": \"Drama\"}]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>370980</td>\n",
       "      <td>[{\"id\": 717, \"name\": \"pope\"}, {\"id\": 5565, \"na...</td>\n",
       "      <td>it</td>\n",
       "      <td>Chiamatemi Francesco - Il Papa della gente</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.738646</td>\n",
       "      <td>[{\"name\": \"Taodue Film\", \"id\": 45724}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"IT\", \"name\": \"Italy\"}]</td>\n",
       "      <td>2015-12-03</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[{\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Chiamatemi Francesco - Il Papa della gente</td>\n",
       "      <td>7.3</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4140</th>\n",
       "      <td>2</td>\n",
       "      <td>[{\"id\": 99, \"name\": \"Documentary\"}]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>459488</td>\n",
       "      <td>[{\"id\": 6027, \"name\": \"music\"}, {\"id\": 225822,...</td>\n",
       "      <td>en</td>\n",
       "      <td>To Be Frank, Sinatra at 100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.050625</td>\n",
       "      <td>[{\"name\": \"Eyeline Entertainment\", \"id\": 60343}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"}]</td>\n",
       "      <td>2015-12-12</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>To Be Frank, Sinatra at 100</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        budget                               genres homepage      id  \\\n",
       "2656  15000000        [{\"id\": 18, \"name\": \"Drama\"}]      NaN  370980   \n",
       "4140         2  [{\"id\": 99, \"name\": \"Documentary\"}]      NaN  459488   \n",
       "\n",
       "                                               keywords original_language  \\\n",
       "2656  [{\"id\": 717, \"name\": \"pope\"}, {\"id\": 5565, \"na...                it   \n",
       "4140  [{\"id\": 6027, \"name\": \"music\"}, {\"id\": 225822,...                en   \n",
       "\n",
       "                                  original_title overview  popularity  \\\n",
       "2656  Chiamatemi Francesco - Il Papa della gente      NaN    0.738646   \n",
       "4140                 To Be Frank, Sinatra at 100      NaN    0.050625   \n",
       "\n",
       "                                  production_companies  \\\n",
       "2656            [{\"name\": \"Taodue Film\", \"id\": 45724}]   \n",
       "4140  [{\"name\": \"Eyeline Entertainment\", \"id\": 60343}]   \n",
       "\n",
       "                                  production_countries release_date  revenue  \\\n",
       "2656           [{\"iso_3166_1\": \"IT\", \"name\": \"Italy\"}]   2015-12-03        0   \n",
       "4140  [{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"}]   2015-12-12        0   \n",
       "\n",
       "      runtime                               spoken_languages    status  \\\n",
       "2656      NaN  [{\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]  Released   \n",
       "4140      NaN                                             []  Released   \n",
       "\n",
       "     tagline                                       title  vote_average  \\\n",
       "2656     NaN  Chiamatemi Francesco - Il Papa della gente           7.3   \n",
       "4140     NaN                 To Be Frank, Sinatra at 100           0.0   \n",
       "\n",
       "      vote_count  \n",
       "2656          12  \n",
       "4140           0  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['runtime'].isna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-14T13:53:27.331000Z",
     "start_time": "2020-12-14T13:53:27.306402Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>270000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
       "      <td>http://www.superman.com</td>\n",
       "      <td>1452</td>\n",
       "      <td>[{\"id\": 83, \"name\": \"saving the world\"}, {\"id\"...</td>\n",
       "      <td>en</td>\n",
       "      <td>Superman Returns</td>\n",
       "      <td>Superman returns to discover his 5-year absenc...</td>\n",
       "      <td>57.925623</td>\n",
       "      <td>[{\"name\": \"DC Comics\", \"id\": 429}, {\"name\": \"L...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2006-06-28</td>\n",
       "      <td>391081192</td>\n",
       "      <td>154.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Superman Returns</td>\n",
       "      <td>5.4</td>\n",
       "      <td>1400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>185000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.startrekmovie.com/</td>\n",
       "      <td>188927</td>\n",
       "      <td>[{\"id\": 9663, \"name\": \"sequel\"}, {\"id\": 9743, ...</td>\n",
       "      <td>en</td>\n",
       "      <td>Star Trek Beyond</td>\n",
       "      <td>The USS Enterprise crew explores the furthest ...</td>\n",
       "      <td>65.352913</td>\n",
       "      <td>[{\"name\": \"Paramount Pictures\", \"id\": 4}, {\"na...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2016-07-07</td>\n",
       "      <td>343471816</td>\n",
       "      <td>122.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Star Trek Beyond</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66</th>\n",
       "      <td>175000000</td>\n",
       "      <td>[{\"id\": 16, \"name\": \"Animation\"}, {\"id\": 35, \"...</td>\n",
       "      <td>http://disney.go.com/disneypictures/up/</td>\n",
       "      <td>14160</td>\n",
       "      <td>[{\"id\": 965, \"name\": \"age difference\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>Up</td>\n",
       "      <td>Carl Fredricksen spent his entire life dreamin...</td>\n",
       "      <td>92.201962</td>\n",
       "      <td>[{\"name\": \"Pixar Animation Studios\", \"id\": 3}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2009-05-13</td>\n",
       "      <td>735099082</td>\n",
       "      <td>96.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Up</td>\n",
       "      <td>7.7</td>\n",
       "      <td>6870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>175000000</td>\n",
       "      <td>[{\"id\": 10751, \"name\": \"Family\"}, {\"id\": 12, \"...</td>\n",
       "      <td>http://movies.disney.com/the-jungle-book-2016</td>\n",
       "      <td>278927</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>The Jungle Book</td>\n",
       "      <td>After a threat from the tiger Shere Khan force...</td>\n",
       "      <td>94.199316</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...</td>\n",
       "      <td>2016-04-07</td>\n",
       "      <td>966550600</td>\n",
       "      <td>106.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Jungle Book</td>\n",
       "      <td>6.7</td>\n",
       "      <td>2892</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>150000000</td>\n",
       "      <td>[{\"id\": 53, \"name\": \"Thriller\"}, {\"id\": 9648, ...</td>\n",
       "      <td>http://www.angelsanddemons.com/</td>\n",
       "      <td>13448</td>\n",
       "      <td>[{\"id\": 588, \"name\": \"rome\"}, {\"id\": 716, \"nam...</td>\n",
       "      <td>en</td>\n",
       "      <td>Angels &amp; Demons</td>\n",
       "      <td>Harvard symbologist Robert Langdon investigate...</td>\n",
       "      <td>67.447636</td>\n",
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...</td>\n",
       "      <td>[{\"iso_3166_1\": \"IT\", \"name\": \"Italy\"}, {\"iso_...</td>\n",
       "      <td>2009-05-13</td>\n",
       "      <td>356613439</td>\n",
       "      <td>138.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Angels &amp; Demons</td>\n",
       "      <td>6.5</td>\n",
       "      <td>2129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4790</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 18, \"name\": \"Drama\"}, {\"id\": 10769, \"n...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13898</td>\n",
       "      <td>[]</td>\n",
       "      <td>fa</td>\n",
       "      <td>دایره</td>\n",
       "      <td>Various women struggle to function in the oppr...</td>\n",
       "      <td>1.193779</td>\n",
       "      <td>[{\"name\": \"Jafar Panahi Film Productions\", \"id...</td>\n",
       "      <td>[{\"iso_3166_1\": \"IR\", \"name\": \"Iran\"}]</td>\n",
       "      <td>2000-09-08</td>\n",
       "      <td>0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>[{\"iso_639_1\": \"fa\", \"name\": \"\\u0641\\u0627\\u06...</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The Circle</td>\n",
       "      <td>6.6</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4794</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 53, \"name\": \"Thriller\"}, {\"id\": 27, \"n...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>286939</td>\n",
       "      <td>[]</td>\n",
       "      <td>en</td>\n",
       "      <td>Sanctuary: Quite a Conundrum</td>\n",
       "      <td>It should have been just a normal day of sex, ...</td>\n",
       "      <td>0.166513</td>\n",
       "      <td>[{\"name\": \"Gold Lion Films\", \"id\": 37870}, {\"n...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-01-20</td>\n",
       "      <td>0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sanctuary: Quite a Conundrum</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4797</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 10769, \"name\": \"Foreign\"}, {\"id\": 53, ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>67238</td>\n",
       "      <td>[]</td>\n",
       "      <td>en</td>\n",
       "      <td>Cavite</td>\n",
       "      <td>Adam, a security guard, travels from Californi...</td>\n",
       "      <td>0.022173</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>2005-03-12</td>\n",
       "      <td>0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>[]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Cavite</td>\n",
       "      <td>7.5</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4800</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 35, \"name\": \"Comedy\"}, {\"id\": 18, \"nam...</td>\n",
       "      <td>http://www.hallmarkchannel.com/signedsealeddel...</td>\n",
       "      <td>231617</td>\n",
       "      <td>[{\"id\": 248, \"name\": \"date\"}, {\"id\": 699, \"nam...</td>\n",
       "      <td>en</td>\n",
       "      <td>Signed, Sealed, Delivered</td>\n",
       "      <td>\"Signed, Sealed, Delivered\" introduces a dedic...</td>\n",
       "      <td>1.444476</td>\n",
       "      <td>[{\"name\": \"Front Street Pictures\", \"id\": 3958}...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2013-10-13</td>\n",
       "      <td>0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Signed, Sealed, Delivered</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4802</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 99, \"name\": \"Documentary\"}]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25975</td>\n",
       "      <td>[{\"id\": 1523, \"name\": \"obsession\"}, {\"id\": 224...</td>\n",
       "      <td>en</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>Ever since the second grade when he first saw ...</td>\n",
       "      <td>1.929883</td>\n",
       "      <td>[{\"name\": \"rusty bear entertainment\", \"id\": 87...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2005-08-05</td>\n",
       "      <td>0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>6.3</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>844 rows × 20 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         budget                                             genres  \\\n",
       "10    270000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
       "56    185000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "66    175000000  [{\"id\": 16, \"name\": \"Animation\"}, {\"id\": 35, \"...   \n",
       "78    175000000  [{\"id\": 10751, \"name\": \"Family\"}, {\"id\": 12, \"...   \n",
       "128   150000000  [{\"id\": 53, \"name\": \"Thriller\"}, {\"id\": 9648, ...   \n",
       "...         ...                                                ...   \n",
       "4790          0  [{\"id\": 18, \"name\": \"Drama\"}, {\"id\": 10769, \"n...   \n",
       "4794          0  [{\"id\": 53, \"name\": \"Thriller\"}, {\"id\": 27, \"n...   \n",
       "4797          0  [{\"id\": 10769, \"name\": \"Foreign\"}, {\"id\": 53, ...   \n",
       "4800          0  [{\"id\": 35, \"name\": \"Comedy\"}, {\"id\": 18, \"nam...   \n",
       "4802          0                [{\"id\": 99, \"name\": \"Documentary\"}]   \n",
       "\n",
       "                                               homepage      id  \\\n",
       "10                              http://www.superman.com    1452   \n",
       "56                        http://www.startrekmovie.com/  188927   \n",
       "66              http://disney.go.com/disneypictures/up/   14160   \n",
       "78        http://movies.disney.com/the-jungle-book-2016  278927   \n",
       "128                     http://www.angelsanddemons.com/   13448   \n",
       "...                                                 ...     ...   \n",
       "4790                                                NaN   13898   \n",
       "4794                                                NaN  286939   \n",
       "4797                                                NaN   67238   \n",
       "4800  http://www.hallmarkchannel.com/signedsealeddel...  231617   \n",
       "4802                                                NaN   25975   \n",
       "\n",
       "                                               keywords original_language  \\\n",
       "10    [{\"id\": 83, \"name\": \"saving the world\"}, {\"id\"...                en   \n",
       "56    [{\"id\": 9663, \"name\": \"sequel\"}, {\"id\": 9743, ...                en   \n",
       "66    [{\"id\": 965, \"name\": \"age difference\"}, {\"id\":...                en   \n",
       "78    [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...                en   \n",
       "128   [{\"id\": 588, \"name\": \"rome\"}, {\"id\": 716, \"nam...                en   \n",
       "...                                                 ...               ...   \n",
       "4790                                                 []                fa   \n",
       "4794                                                 []                en   \n",
       "4797                                                 []                en   \n",
       "4800  [{\"id\": 248, \"name\": \"date\"}, {\"id\": 699, \"nam...                en   \n",
       "4802  [{\"id\": 1523, \"name\": \"obsession\"}, {\"id\": 224...                en   \n",
       "\n",
       "                    original_title  \\\n",
       "10                Superman Returns   \n",
       "56                Star Trek Beyond   \n",
       "66                              Up   \n",
       "78                 The Jungle Book   \n",
       "128                Angels & Demons   \n",
       "...                            ...   \n",
       "4790                         دایره   \n",
       "4794  Sanctuary: Quite a Conundrum   \n",
       "4797                        Cavite   \n",
       "4800     Signed, Sealed, Delivered   \n",
       "4802             My Date with Drew   \n",
       "\n",
       "                                               overview  popularity  \\\n",
       "10    Superman returns to discover his 5-year absenc...   57.925623   \n",
       "56    The USS Enterprise crew explores the furthest ...   65.352913   \n",
       "66    Carl Fredricksen spent his entire life dreamin...   92.201962   \n",
       "78    After a threat from the tiger Shere Khan force...   94.199316   \n",
       "128   Harvard symbologist Robert Langdon investigate...   67.447636   \n",
       "...                                                 ...         ...   \n",
       "4790  Various women struggle to function in the oppr...    1.193779   \n",
       "4794  It should have been just a normal day of sex, ...    0.166513   \n",
       "4797  Adam, a security guard, travels from Californi...    0.022173   \n",
       "4800  \"Signed, Sealed, Delivered\" introduces a dedic...    1.444476   \n",
       "4802  Ever since the second grade when he first saw ...    1.929883   \n",
       "\n",
       "                                   production_companies  \\\n",
       "10    [{\"name\": \"DC Comics\", \"id\": 429}, {\"name\": \"L...   \n",
       "56    [{\"name\": \"Paramount Pictures\", \"id\": 4}, {\"na...   \n",
       "66       [{\"name\": \"Pixar Animation Studios\", \"id\": 3}]   \n",
       "78    [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...   \n",
       "128   [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...   \n",
       "...                                                 ...   \n",
       "4790  [{\"name\": \"Jafar Panahi Film Productions\", \"id...   \n",
       "4794  [{\"name\": \"Gold Lion Films\", \"id\": 37870}, {\"n...   \n",
       "4797                                                 []   \n",
       "4800  [{\"name\": \"Front Street Pictures\", \"id\": 3958}...   \n",
       "4802  [{\"name\": \"rusty bear entertainment\", \"id\": 87...   \n",
       "\n",
       "                                   production_countries release_date  \\\n",
       "10    [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2006-06-28   \n",
       "56    [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2016-07-07   \n",
       "66    [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2009-05-13   \n",
       "78    [{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...   2016-04-07   \n",
       "128   [{\"iso_3166_1\": \"IT\", \"name\": \"Italy\"}, {\"iso_...   2009-05-13   \n",
       "...                                                 ...          ...   \n",
       "4790             [{\"iso_3166_1\": \"IR\", \"name\": \"Iran\"}]   2000-09-08   \n",
       "4794  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-01-20   \n",
       "4797                                                 []   2005-03-12   \n",
       "4800  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2013-10-13   \n",
       "4802  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2005-08-05   \n",
       "\n",
       "        revenue  runtime                                   spoken_languages  \\\n",
       "10    391081192    154.0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...   \n",
       "56    343471816    122.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "66    735099082     96.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "78    966550600    106.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "128   356613439    138.0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...   \n",
       "...         ...      ...                                                ...   \n",
       "4790          0     90.0  [{\"iso_639_1\": \"fa\", \"name\": \"\\u0641\\u0627\\u06...   \n",
       "4794          0     82.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "4797          0     80.0                                                 []   \n",
       "4800          0    120.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "4802          0     90.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "\n",
       "        status tagline                         title  vote_average  vote_count  \n",
       "10    Released     NaN              Superman Returns           5.4        1400  \n",
       "56    Released     NaN              Star Trek Beyond           6.6        2568  \n",
       "66    Released     NaN                            Up           7.7        6870  \n",
       "78    Released     NaN               The Jungle Book           6.7        2892  \n",
       "128   Released     NaN               Angels & Demons           6.5        2129  \n",
       "...        ...     ...                           ...           ...         ...  \n",
       "4790  Released     NaN                    The Circle           6.6          17  \n",
       "4794  Released     NaN  Sanctuary: Quite a Conundrum           0.0           0  \n",
       "4797  Released     NaN                        Cavite           7.5           2  \n",
       "4800  Released     NaN     Signed, Sealed, Delivered           7.0           6  \n",
       "4802  Released     NaN             My Date with Drew           6.3          16  \n",
       "\n",
       "[844 rows x 20 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['tagline'].isna()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-15T12:29:36.177475Z",
     "start_time": "2020-12-15T12:29:36.173500Z"
    }
   },
   "source": [
    "# 补全数据\n",
    "## 进行补全的特征\n",
    "### release_date\n",
    "\n",
    "- America Is Still the Place : 2014-06-01\n",
    "\n",
    "### runtime\n",
    "- Chiamatemi Francesco - Il Papa della gente : 98.0\n",
    "- To Be Frank, Sinatra at 100 : 81.0\n",
    "\n",
    "## 不进行补全的特征\n",
    "- homepage 影片主页\n",
    "- tagline 宣传语"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-15T12:40:39.179748Z",
     "start_time": "2020-12-15T12:40:39.166440Z"
    }
   },
   "outputs": [],
   "source": [
    "data.loc[4553,['release_date']] = \"2014-06-01\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-15T12:41:58.161345Z",
     "start_time": "2020-12-15T12:41:58.148848Z"
    }
   },
   "outputs": [],
   "source": [
    "data.loc[2656,['runtime']] = 98.0\n",
    "data.loc[4140,['runtime']] = 81.0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据类型转换\n",
    "\n",
    "### datetime\n",
    "- release_date\n",
    "\n",
    "### json\n",
    "\n",
    "- genres\n",
    "- keywords\n",
    "- production_companies\n",
    "- production_countries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-16T07:16:33.337681Z",
     "start_time": "2020-12-16T07:16:33.319065Z"
    }
   },
   "outputs": [],
   "source": [
    "data['release_date'] = pd.to_datetime(data['release_date'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-16T07:16:34.637029Z",
     "start_time": "2020-12-16T07:16:34.206152Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       [{'iso_3166_1': 'US', 'name': 'United States o...\n",
       "1       [{'iso_3166_1': 'US', 'name': 'United States o...\n",
       "2       [{'iso_3166_1': 'GB', 'name': 'United Kingdom'...\n",
       "3       [{'iso_3166_1': 'US', 'name': 'United States o...\n",
       "4       [{'iso_3166_1': 'US', 'name': 'United States o...\n",
       "                              ...                        \n",
       "4798    [{'iso_3166_1': 'MX', 'name': 'Mexico'}, {'iso...\n",
       "4799                                                   []\n",
       "4800    [{'iso_3166_1': 'US', 'name': 'United States o...\n",
       "4801    [{'iso_3166_1': 'US', 'name': 'United States o...\n",
       "4802    [{'iso_3166_1': 'US', 'name': 'United States o...\n",
       "Name: production_countries, Length: 4803, dtype: object"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['genres'].map(eval)\n",
    "data['keywords'].map(eval)\n",
    "data['production_companies'].map(eval)\n",
    "data['production_countries'].map(eval)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 筛选数据\n",
    "## 去掉票房、预算、受欢迎程度、评分为均为0的数据\n",
    "## 筛选出评分人数大于5的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-16T07:16:42.161550Z",
     "start_time": "2020-12-16T07:16:42.134048Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>popularity</th>\n",
       "      <th>revenue</th>\n",
       "      <th>budget</th>\n",
       "      <th>vote_average</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>4803.000000</td>\n",
       "      <td>4.803000e+03</td>\n",
       "      <td>4.803000e+03</td>\n",
       "      <td>4803.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>21.492301</td>\n",
       "      <td>8.226064e+07</td>\n",
       "      <td>2.904504e+07</td>\n",
       "      <td>6.092172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>31.816650</td>\n",
       "      <td>1.628571e+08</td>\n",
       "      <td>4.072239e+07</td>\n",
       "      <td>1.194612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>4.668070</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>7.900000e+05</td>\n",
       "      <td>5.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>12.921594</td>\n",
       "      <td>1.917000e+07</td>\n",
       "      <td>1.500000e+07</td>\n",
       "      <td>6.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>28.313505</td>\n",
       "      <td>9.291719e+07</td>\n",
       "      <td>4.000000e+07</td>\n",
       "      <td>6.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>875.581305</td>\n",
       "      <td>2.787965e+09</td>\n",
       "      <td>3.800000e+08</td>\n",
       "      <td>10.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        popularity       revenue        budget  vote_average\n",
       "count  4803.000000  4.803000e+03  4.803000e+03   4803.000000\n",
       "mean     21.492301  8.226064e+07  2.904504e+07      6.092172\n",
       "std      31.816650  1.628571e+08  4.072239e+07      1.194612\n",
       "min       0.000000  0.000000e+00  0.000000e+00      0.000000\n",
       "25%       4.668070  0.000000e+00  7.900000e+05      5.600000\n",
       "50%      12.921594  1.917000e+07  1.500000e+07      6.200000\n",
       "75%      28.313505  9.291719e+07  4.000000e+07      6.800000\n",
       "max     875.581305  2.787965e+09  3.800000e+08     10.000000"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[['popularity','revenue','budget','vote_average']].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-16T07:39:05.103395Z",
     "start_time": "2020-12-16T07:39:05.029885Z"
    }
   },
   "outputs": [],
   "source": [
    "data = data[pd.DataFrame([data['revenue'] > 0,data['budget'] > 0,data['popularity'] > 0,data['vote_average'] > 0]).any()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-16T07:47:51.438772Z",
     "start_time": "2020-12-16T07:47:51.433691Z"
    }
   },
   "outputs": [],
   "source": [
    "data = data[data['vote_count'] > 5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-16T07:48:53.807831Z",
     "start_time": "2020-12-16T07:48:53.783982Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>budget</th>\n",
       "      <th>genres</th>\n",
       "      <th>homepage</th>\n",
       "      <th>id</th>\n",
       "      <th>keywords</th>\n",
       "      <th>original_language</th>\n",
       "      <th>original_title</th>\n",
       "      <th>overview</th>\n",
       "      <th>popularity</th>\n",
       "      <th>production_companies</th>\n",
       "      <th>production_countries</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revenue</th>\n",
       "      <th>runtime</th>\n",
       "      <th>spoken_languages</th>\n",
       "      <th>status</th>\n",
       "      <th>tagline</th>\n",
       "      <th>title</th>\n",
       "      <th>vote_average</th>\n",
       "      <th>vote_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>237000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.avatarmovie.com/</td>\n",
       "      <td>19995</td>\n",
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
       "      <td>150.437577</td>\n",
       "      <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2009-12-10</td>\n",
       "      <td>2787965087</td>\n",
       "      <td>162.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
       "      <td>Released</td>\n",
       "      <td>Enter the World of Pandora.</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>7.2</td>\n",
       "      <td>11800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>300000000</td>\n",
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
       "      <td>http://disney.go.com/disneypictures/pirates/</td>\n",
       "      <td>285</td>\n",
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
       "      <td>en</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
       "      <td>139.082615</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2007-05-19</td>\n",
       "      <td>961000000</td>\n",
       "      <td>169.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>At the end of the world, the adventure begins.</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>6.9</td>\n",
       "      <td>4500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>245000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://www.sonypictures.com/movies/spectre/</td>\n",
       "      <td>206647</td>\n",
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
       "      <td>en</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
       "      <td>107.376788</td>\n",
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...</td>\n",
       "      <td>[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...</td>\n",
       "      <td>2015-10-26</td>\n",
       "      <td>880674609</td>\n",
       "      <td>148.0</td>\n",
       "      <td>[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...</td>\n",
       "      <td>Released</td>\n",
       "      <td>A Plan No One Escapes</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>6.3</td>\n",
       "      <td>4466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>250000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
       "      <td>http://www.thedarkknightrises.com/</td>\n",
       "      <td>49026</td>\n",
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
       "      <td>en</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>Following the death of District Attorney Harve...</td>\n",
       "      <td>112.312950</td>\n",
       "      <td>[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-07-16</td>\n",
       "      <td>1084939099</td>\n",
       "      <td>165.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>The Legend Ends</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>7.6</td>\n",
       "      <td>9106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>260000000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
       "      <td>http://movies.disney.com/john-carter</td>\n",
       "      <td>49529</td>\n",
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
       "      <td>en</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
       "      <td>43.926995</td>\n",
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-03-07</td>\n",
       "      <td>284139100</td>\n",
       "      <td>132.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>Lost in our world, found in another.</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>6.1</td>\n",
       "      <td>2124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4525</th>\n",
       "      <td>7000</td>\n",
       "      <td>[{\"id\": 878, \"name\": \"Science Fiction\"}, {\"id\"...</td>\n",
       "      <td>http://www.primermovie.com</td>\n",
       "      <td>14337</td>\n",
       "      <td>[{\"id\": 1448, \"name\": \"distrust\"}, {\"id\": 2101...</td>\n",
       "      <td>en</td>\n",
       "      <td>Primer</td>\n",
       "      <td>Friends/fledgling entrepreneurs invent a devic...</td>\n",
       "      <td>23.307949</td>\n",
       "      <td>[{\"name\": \"Thinkfilm\", \"id\": 446}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2004-10-08</td>\n",
       "      <td>424760</td>\n",
       "      <td>77.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>What happens if it actually works?</td>\n",
       "      <td>Primer</td>\n",
       "      <td>6.9</td>\n",
       "      <td>658</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4526</th>\n",
       "      <td>220000</td>\n",
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9367</td>\n",
       "      <td>[{\"id\": 5616, \"name\": \"united states\\u2013mexi...</td>\n",
       "      <td>es</td>\n",
       "      <td>El Mariachi</td>\n",
       "      <td>El Mariachi just wants to play his guitar and ...</td>\n",
       "      <td>14.269792</td>\n",
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}]</td>\n",
       "      <td>[{\"iso_3166_1\": \"MX\", \"name\": \"Mexico\"}, {\"iso...</td>\n",
       "      <td>1992-09-04</td>\n",
       "      <td>2040920</td>\n",
       "      <td>81.0</td>\n",
       "      <td>[{\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>He didn't come looking for trouble, but troubl...</td>\n",
       "      <td>El Mariachi</td>\n",
       "      <td>6.6</td>\n",
       "      <td>238</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4527</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 35, \"name\": \"Comedy\"}, {\"id\": 18, \"nam...</td>\n",
       "      <td>http://www.hallmarkchannel.com/signedsealeddel...</td>\n",
       "      <td>231617</td>\n",
       "      <td>[{\"id\": 248, \"name\": \"date\"}, {\"id\": 699, \"nam...</td>\n",
       "      <td>en</td>\n",
       "      <td>Signed, Sealed, Delivered</td>\n",
       "      <td>\"Signed, Sealed, Delivered\" introduces a dedic...</td>\n",
       "      <td>1.444476</td>\n",
       "      <td>[{\"name\": \"Front Street Pictures\", \"id\": 3958}...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2013-10-13</td>\n",
       "      <td>0</td>\n",
       "      <td>120.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Signed, Sealed, Delivered</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4528</th>\n",
       "      <td>0</td>\n",
       "      <td>[]</td>\n",
       "      <td>http://shanghaicalling.com/</td>\n",
       "      <td>126186</td>\n",
       "      <td>[]</td>\n",
       "      <td>en</td>\n",
       "      <td>Shanghai Calling</td>\n",
       "      <td>When ambitious New York attorney Sam is sent t...</td>\n",
       "      <td>0.857008</td>\n",
       "      <td>[]</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2012-05-03</td>\n",
       "      <td>0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>A New Yorker in Shanghai</td>\n",
       "      <td>Shanghai Calling</td>\n",
       "      <td>5.7</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4529</th>\n",
       "      <td>0</td>\n",
       "      <td>[{\"id\": 99, \"name\": \"Documentary\"}]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25975</td>\n",
       "      <td>[{\"id\": 1523, \"name\": \"obsession\"}, {\"id\": 224...</td>\n",
       "      <td>en</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>Ever since the second grade when he first saw ...</td>\n",
       "      <td>1.929883</td>\n",
       "      <td>[{\"name\": \"rusty bear entertainment\", \"id\": 87...</td>\n",
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
       "      <td>2005-08-05</td>\n",
       "      <td>0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
       "      <td>Released</td>\n",
       "      <td>NaN</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>6.3</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4530 rows × 20 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         budget                                             genres  \\\n",
       "0     237000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "1     300000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
       "2     245000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "3     250000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
       "4     260000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
       "...         ...                                                ...   \n",
       "4525       7000  [{\"id\": 878, \"name\": \"Science Fiction\"}, {\"id\"...   \n",
       "4526     220000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
       "4527          0  [{\"id\": 35, \"name\": \"Comedy\"}, {\"id\": 18, \"nam...   \n",
       "4528          0                                                 []   \n",
       "4529          0                [{\"id\": 99, \"name\": \"Documentary\"}]   \n",
       "\n",
       "                                               homepage      id  \\\n",
       "0                           http://www.avatarmovie.com/   19995   \n",
       "1          http://disney.go.com/disneypictures/pirates/     285   \n",
       "2           http://www.sonypictures.com/movies/spectre/  206647   \n",
       "3                    http://www.thedarkknightrises.com/   49026   \n",
       "4                  http://movies.disney.com/john-carter   49529   \n",
       "...                                                 ...     ...   \n",
       "4525                         http://www.primermovie.com   14337   \n",
       "4526                                                NaN    9367   \n",
       "4527  http://www.hallmarkchannel.com/signedsealeddel...  231617   \n",
       "4528                        http://shanghaicalling.com/  126186   \n",
       "4529                                                NaN   25975   \n",
       "\n",
       "                                               keywords original_language  \\\n",
       "0     [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...                en   \n",
       "1     [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...                en   \n",
       "2     [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...                en   \n",
       "3     [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...                en   \n",
       "4     [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...                en   \n",
       "...                                                 ...               ...   \n",
       "4525  [{\"id\": 1448, \"name\": \"distrust\"}, {\"id\": 2101...                en   \n",
       "4526  [{\"id\": 5616, \"name\": \"united states\\u2013mexi...                es   \n",
       "4527  [{\"id\": 248, \"name\": \"date\"}, {\"id\": 699, \"nam...                en   \n",
       "4528                                                 []                en   \n",
       "4529  [{\"id\": 1523, \"name\": \"obsession\"}, {\"id\": 224...                en   \n",
       "\n",
       "                                original_title  \\\n",
       "0                                       Avatar   \n",
       "1     Pirates of the Caribbean: At World's End   \n",
       "2                                      Spectre   \n",
       "3                        The Dark Knight Rises   \n",
       "4                                  John Carter   \n",
       "...                                        ...   \n",
       "4525                                    Primer   \n",
       "4526                               El Mariachi   \n",
       "4527                 Signed, Sealed, Delivered   \n",
       "4528                          Shanghai Calling   \n",
       "4529                         My Date with Drew   \n",
       "\n",
       "                                               overview  popularity  \\\n",
       "0     In the 22nd century, a paraplegic Marine is di...  150.437577   \n",
       "1     Captain Barbossa, long believed to be dead, ha...  139.082615   \n",
       "2     A cryptic message from Bond’s past sends him o...  107.376788   \n",
       "3     Following the death of District Attorney Harve...  112.312950   \n",
       "4     John Carter is a war-weary, former military ca...   43.926995   \n",
       "...                                                 ...         ...   \n",
       "4525  Friends/fledgling entrepreneurs invent a devic...   23.307949   \n",
       "4526  El Mariachi just wants to play his guitar and ...   14.269792   \n",
       "4527  \"Signed, Sealed, Delivered\" introduces a dedic...    1.444476   \n",
       "4528  When ambitious New York attorney Sam is sent t...    0.857008   \n",
       "4529  Ever since the second grade when he first saw ...    1.929883   \n",
       "\n",
       "                                   production_companies  \\\n",
       "0     [{\"name\": \"Ingenious Film Partners\", \"id\": 289...   \n",
       "1     [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...   \n",
       "2     [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...   \n",
       "3     [{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...   \n",
       "4           [{\"name\": \"Walt Disney Pictures\", \"id\": 2}]   \n",
       "...                                                 ...   \n",
       "4525                 [{\"name\": \"Thinkfilm\", \"id\": 446}]   \n",
       "4526           [{\"name\": \"Columbia Pictures\", \"id\": 5}]   \n",
       "4527  [{\"name\": \"Front Street Pictures\", \"id\": 3958}...   \n",
       "4528                                                 []   \n",
       "4529  [{\"name\": \"rusty bear entertainment\", \"id\": 87...   \n",
       "\n",
       "                                   production_countries release_date  \\\n",
       "0     [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2009-12-10   \n",
       "1     [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2007-05-19   \n",
       "2     [{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...   2015-10-26   \n",
       "3     [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-07-16   \n",
       "4     [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-03-07   \n",
       "...                                                 ...          ...   \n",
       "4525  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2004-10-08   \n",
       "4526  [{\"iso_3166_1\": \"MX\", \"name\": \"Mexico\"}, {\"iso...   1992-09-04   \n",
       "4527  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2013-10-13   \n",
       "4528  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-05-03   \n",
       "4529  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2005-08-05   \n",
       "\n",
       "         revenue  runtime                                   spoken_languages  \\\n",
       "0     2787965087    162.0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...   \n",
       "1      961000000    169.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "2      880674609    148.0  [{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...   \n",
       "3     1084939099    165.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "4      284139100    132.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "...          ...      ...                                                ...   \n",
       "4525      424760     77.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "4526     2040920     81.0      [{\"iso_639_1\": \"es\", \"name\": \"Espa\\u00f1ol\"}]   \n",
       "4527           0    120.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "4528           0     98.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "4529           0     90.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]   \n",
       "\n",
       "        status                                            tagline  \\\n",
       "0     Released                        Enter the World of Pandora.   \n",
       "1     Released     At the end of the world, the adventure begins.   \n",
       "2     Released                              A Plan No One Escapes   \n",
       "3     Released                                    The Legend Ends   \n",
       "4     Released               Lost in our world, found in another.   \n",
       "...        ...                                                ...   \n",
       "4525  Released                 What happens if it actually works?   \n",
       "4526  Released  He didn't come looking for trouble, but troubl...   \n",
       "4527  Released                                                NaN   \n",
       "4528  Released                           A New Yorker in Shanghai   \n",
       "4529  Released                                                NaN   \n",
       "\n",
       "                                         title  vote_average  vote_count  \n",
       "0                                       Avatar           7.2       11800  \n",
       "1     Pirates of the Caribbean: At World's End           6.9        4500  \n",
       "2                                      Spectre           6.3        4466  \n",
       "3                        The Dark Knight Rises           7.6        9106  \n",
       "4                                  John Carter           6.1        2124  \n",
       "...                                        ...           ...         ...  \n",
       "4525                                    Primer           6.9         658  \n",
       "4526                               El Mariachi           6.6         238  \n",
       "4527                 Signed, Sealed, Delivered           7.0           6  \n",
       "4528                          Shanghai Calling           5.7           7  \n",
       "4529                         My Date with Drew           6.3          16  \n",
       "\n",
       "[4530 rows x 20 columns]"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 保存数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-12-16T07:53:19.951193Z",
     "start_time": "2020-12-16T07:53:19.814714Z"
    }
   },
   "outputs": [],
   "source": [
    "data.to_csv('./after_process_tmdb_5000_movies.csv',index=False,encoding = 'utf-8')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 完成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1rc1"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
